Package loading
source("../src/functions.R")
Data loading
count <- read_delim("../data/count.txt.gz", delim="\t")
##
## ── Column specification ───────────────────────────────────────────────
## cols(
## .default = col_double(),
## gene_name = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
expdesign <- read_delim("../data/expdesign.txt.gz", delim="\t", skip=6)
##
## ── Column specification ───────────────────────────────────────────────
## cols(
## index = col_double(),
## sequencing_batch = col_double(),
## amplification_batch = col_double(),
## mouse_ID = col_logical(),
## pool_barcode = col_character(),
## sample_barcode = col_character(),
## plate_id = col_double(),
## well_id = col_character(),
## number_of_cells = col_character(),
## sorting_markers = col_character(),
## RMT_length = col_double(),
## group_name = col_character(),
## ERCC_dilution = col_double(),
## ERCC_volume_ul = col_double(),
## Column_name_in_processed_data_file = col_character()
## )
## Warning: 3072 parsing failures.
## row col expected actual file
## 1527 mouse_ID 1/0/T/F/TRUE/FALSE 4 '../data/expdesign.txt.gz'
## 1528 mouse_ID 1/0/T/F/TRUE/FALSE 4 '../data/expdesign.txt.gz'
## 1529 mouse_ID 1/0/T/F/TRUE/FALSE 4 '../data/expdesign.txt.gz'
## 1530 mouse_ID 1/0/T/F/TRUE/FALSE 4 '../data/expdesign.txt.gz'
## 1531 mouse_ID 1/0/T/F/TRUE/FALSE 4 '../data/expdesign.txt.gz'
## .... ........ .................. ...... ..........................
## See problems(...) for more details.
Wide -> Long
# Merge
# Filtering non single-cells / zero count cells
count %>%
pivot_longer(-gene_name, names_to="cell", values_to="exp") %>%
inner_join(expdesign,
by=c("cell"="Column_name_in_processed_data_file")) %>%
rename_all(tolower) %>%
group_by(cell) %>%
mutate(sum=sum(exp)) %>%
filter(sum != 0 &&
number_of_cells == 1 &&
group_name %in%
c("B cell", "CD8+pDC",
"monocyte_or_neutrophil", "NK_cell")) %>%
ungroup %>%
arrange(cell) -> marsdata
Long -> Wide
# Extract gene expression part
marsdata %>%
select(gene_name, cell, exp) %>%
pivot_wider(names_from="cell", values_from="exp") -> wide_marsdata
# Gene name
wide_marsdata %>%
select(gene_name) %>%
data.frame %>%
.[,1] -> genename
# Expression matrix -> SingleCellExperiment
wide_marsdata %>%
select(!gene_name) %>%
as.matrix %>%
SingleCellExperiment(assays=list(counts=.[])) -> sce_marsdata
# rownames
genename -> rownames(sce_marsdata)
# coldata
marsdata %>%
select(!c(gene_name, exp)) %>%
distinct %>%
arrange(cell) %>%
DataFrame -> colData(sce_marsdata)
scater
# Analysis workflow of scater
sce_marsdata %>%
logNormCounts %>%
runPCA(ntop=2000, ncomponents=10) %>%
runTSNE(dimred = "PCA") %>%
runUMAP(dimred = "PCA") -> sce_marsdata
Visualization
Standard plot functions of R
pairs(reducedDim(sce_marsdata, "PCA"),
col=factor(colData(sce_marsdata)$group_name),
pch=16, cex=0.5, main="PCA")

plot(reducedDim(sce_marsdata, "TSNE"),
col=factor(colData(sce_marsdata)$group_name),
xlab="tSNE1", ylab="tSNE2",
pch=16, cex=2, main="tSNE")

# Pair/scatter plot (scater)
sce_marsdata %>%
plotReducedDim(dimred="PCA", colour_by="group_name", ncomponents=10)

sce_marsdata %>%
plotReducedDim(dimred="TSNE", colour_by="group_name")

sce_marsdata %>%
plotReducedDim(dimred="UMAP", colour_by="group_name")

Scatter plot (schex)
sce_marsdata %>%
make_hexbin(nbins=40, dimension_reduction="TSNE") %>%
plot_hexbin_feature(feature="Cd8a", type="logcounts",
action="mean", xlab="tSNE1", ylab="tSNE2",
title=paste0("Mean of Cd8a"))

ggplot2
reducedDim(sce_marsdata, "TSNE") %>%
cbind(colData(sce_marsdata)$group_name) %>%
data.frame %>%
mutate_at(vars(-X3), as.numeric) %>%
ggplot(aes(x=X1, y=X2, color=X3)) +
geom_point() +
xlab("PC1") +
ylab("PC2") +
theme(legend.title = element_blank())

ggpairs
reducedDim(sce_marsdata, "PCA") %>%
cbind(colData(sce_marsdata)$group_name) %>%
data.frame %>%
mutate_at(vars(-V11), as.numeric) %>%
ggpairs(columns=1:10, aes(color=V11))

pairsD3
reducedDim(sce_marsdata, "PCA") %>%
.[,1:5] %>%
pairsD3(group=colData(sce_marsdata)$group_name,
tooltip=colData(sce_marsdata)$cell)
Plotly
reducedDim(sce_marsdata) %>%
as_tibble %>%
select(PC1, PC2, PC3) %>%
data.frame %>%
plot_ly(x=~PC1, y=~PC2, z=~PC3,
type = "scatter3d", mode = "markers",
text = colData(sce_marsdata)$cell,
color =~colData(sce_marsdata)$group_name
)
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
iSEE
# app <- iSEE(sce_marsdata)
# runApp(app)
Session info
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS/LAPACK: /Users/tsuyusakikouki/opt/anaconda3/envs/r-4.0/lib/libopenblasp-r0.3.10.dylib
##
## locale:
## [1] C/UTF-8/C/C/C/C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] schex_1.2.0 Seurat_3.2.2
## [3] ggbeeswarm_0.6.0 styler_1.3.2
## [5] formatR_1.7 lintr_2.0.1
## [7] knitr_1.30 BiocStyle_2.16.0
## [9] rmarkdown_2.5 pairsD3_0.1.0
## [11] shiny_1.5.0 iSEE_2.0.0
## [13] plotly_4.9.2.1 GGally_2.0.0
## [15] scater_1.16.0 SingleCellExperiment_1.10.1
## [17] SummarizedExperiment_1.18.1 DelayedArray_0.14.0
## [19] matrixStats_0.57.0 Biobase_2.48.0
## [21] GenomicRanges_1.40.0 GenomeInfoDb_1.24.0
## [23] IRanges_2.22.1 S4Vectors_0.26.0
## [25] BiocGenerics_0.34.0 forcats_0.5.0
## [27] stringr_1.4.0 dplyr_1.0.2
## [29] purrr_0.3.4 readr_1.4.0
## [31] tidyr_1.1.2 tibble_3.0.4
## [33] ggplot2_3.3.2 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] shinydashboard_0.7.1 reticulate_1.18
## [3] tidyselect_1.1.0 htmlwidgets_1.5.2
## [5] grid_4.0.3 BiocParallel_1.22.0
## [7] Rtsne_0.15 munsell_0.5.0
## [9] codetools_0.2-17 ica_1.0-2
## [11] DT_0.16 future_1.19.1
## [13] miniUI_0.1.1.1 withr_2.3.0
## [15] colorspace_1.4-1 rstudioapi_0.11
## [17] ROCR_1.0-11 tensor_1.5
## [19] shinyWidgets_0.5.4 listenv_0.8.0
## [21] labeling_0.4.2 GenomeInfoDbData_1.2.3
## [23] polyclip_1.10-0 farver_2.0.3
## [25] rprojroot_1.3-2 vctrs_0.3.4
## [27] generics_0.0.2 xfun_0.18
## [29] R6_2.4.1 clue_0.3-57
## [31] rsvd_1.0.3 concaveman_1.1.0
## [33] rex_1.2.0 spatstat.utils_1.17-0
## [35] bitops_1.0-6 reshape_0.8.8
## [37] shinyAce_0.4.1 assertthat_0.2.1
## [39] promises_1.1.1 scales_1.1.1
## [41] beeswarm_0.2.3 gtable_0.3.0
## [43] globals_0.13.1 goftest_1.2-2
## [45] processx_3.4.4 rlang_0.4.7
## [47] cyclocomp_1.1.0 GlobalOptions_0.1.2
## [49] splines_4.0.3 lazyeval_0.2.2
## [51] hexbin_1.28.1 broom_0.7.2
## [53] abind_1.4-5 reshape2_1.4.4
## [55] BiocManager_1.30.10 yaml_2.2.1
## [57] modelr_0.1.8 crosstalk_1.1.0.1
## [59] backports_1.1.10 httpuv_1.5.4
## [61] tools_4.0.3 ellipsis_0.3.1
## [63] RColorBrewer_1.1-2 ggridges_0.5.2
## [65] Rcpp_1.0.5 plyr_1.8.6
## [67] zlibbioc_1.34.0 RCurl_1.98-1.2
## [69] ps_1.4.0 deldir_0.1-29
## [71] rpart_4.1-15 pbapply_1.4-3
## [73] GetoptLong_1.0.4 viridis_0.5.1
## [75] cowplot_1.1.0 zoo_1.8-8
## [77] haven_2.3.1 ggrepel_0.8.2
## [79] cluster_2.1.0 fs_1.5.0
## [81] magrittr_1.5 RSpectra_0.16-0
## [83] data.table_1.13.2 circlize_0.4.10
## [85] colourpicker_1.1.0 lmtest_0.9-38
## [87] reprex_0.3.0 RANN_2.6.1
## [89] fitdistrplus_1.1-1 hms_0.5.3
## [91] patchwork_1.0.1 shinyjs_2.0.0
## [93] mime_0.9 evaluate_0.14
## [95] xtable_1.8-4 readxl_1.3.1
## [97] gridExtra_2.3 shape_1.4.5
## [99] compiler_4.0.3 KernSmooth_2.23-17
## [101] crayon_1.3.4 entropy_1.2.1
## [103] htmltools_0.5.0 mgcv_1.8-33
## [105] later_1.1.0.1 lubridate_1.7.9
## [107] DBI_1.1.0 tweenr_1.0.1
## [109] dbplyr_1.4.4 ComplexHeatmap_2.4.2
## [111] MASS_7.3-53 Matrix_1.2-18
## [113] cli_2.1.0 igraph_1.2.6
## [115] pkgconfig_2.0.3 xml2_1.3.2
## [117] vipor_0.4.5 XVector_0.28.0
## [119] rvest_0.3.6 callr_3.5.1
## [121] digest_0.6.26 sctransform_0.3.1
## [123] RcppAnnoy_0.0.16 spatstat.data_1.4-3
## [125] cellranger_1.1.0 leiden_0.3.4
## [127] rintrojs_0.2.2 uwot_0.1.8
## [129] DelayedMatrixStats_1.10.0 rjson_0.2.20
## [131] lifecycle_0.2.0 nlme_3.1-149
## [133] jsonlite_1.7.1 BiocNeighbors_1.6.0
## [135] desc_1.2.0 viridisLite_0.3.0
## [137] fansi_0.4.1 pillar_1.4.6
## [139] lattice_0.20-41 fastmap_1.0.1
## [141] httr_1.4.2 survival_3.2-7
## [143] glue_1.4.2 remotes_2.2.0
## [145] FNN_1.1.3 spatstat_1.64-1
## [147] png_0.1-7 ggforce_0.3.2
## [149] stringi_1.5.3 blob_1.2.1
## [151] BiocSingular_1.4.0 irlba_2.3.3
## [153] future.apply_1.6.0